home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-02
/
tptc16.zip
/
TPCSCAN.INC
< prev
next >
Wrap
Text File
|
1993-01-04
|
16KB
|
665 lines
(*
* TPTC - Turbo Pascal to C translator
*
* (C) 1988 Samuel H. Smith (rev. 13-Feb-88)
*
*)
(********************************************************************)
(*
* lexical scanner
*
*)
function coctal(n: integer): anystring;
{convert an integer into a c style octal character literal}
function odigit(n: integer): char;
(* convert an integer into an octal digit *)
begin
odigit := chr( (n and 7) + ord('0') );
end;
begin
coctal := '''\' + odigit(n shr 6) + odigit(n shr 3) + odigit(n) + '''';
toktype := strng;
end;
(********************************************************************)
procedure getchar;
{consume the current char and get the next one}
begin
if read_include then
begin
if eof(inclfd) then
begin
close(inclfd);
read_include := false;
writeln(ofd[level]);
if includeinclude then
writeln(ofd[level],'/* end of ',incl_name,' */')
else
begin
discard_nested;
write(con,^M^J,LJUST(' ',level*2+15),srcfiles[level],^M);
end;
end
else
read(inclfd, nextc);
end;
if not read_include then
begin
if eof(infd) then
endfile;
read(infd, nextc);
end;
if nextc = ^J then
begin
inc(srclines[level]);
if (srclines[level] mod 6) = 0 then
write(con,srcfiles[level],'(',srclines[level],') '^M);
abortcheck;
end;
end;
(********************************************************************)
function usec: char;
{use up the current character(return it) and get
the next one from the input stream}
var
c: char;
begin
c := nextc;
getchar;
usec := c;
end;
(********************************************************************)
function newc(n: string40): string40;
{replace the current character with a different one and get the next
character from the input stream}
var
c: char;
begin
c := nextc;
getchar;
newc := n;
end;
(********************************************************************)
procedure scan_ident;
{scan an identifier; output is ltok; nextc is first character following
the identifier; toktype = identifier; this is the protocol for all of
the scan_xxxx procedures in the lexical analyzer}
begin
toktype := unknown;
ltok := '';
repeat
case nextc of
'A'..'Z':
begin
if map_lower then
nextc := chr( ord(nextc)+32 );
ltok := ltok + nextc;
getchar;
end;
'a'..'z', '0'..'9', '_','@':
ltok := ltok + usec;
else
toktype := identifier;
end;
until toktype = identifier;
end;
(********************************************************************)
procedure scan_preproc;
{scan a tshell preprocessor directive; same syntax as C already}
begin
write(ofd[level],'#');
repeat
write(ofd[level],nextc);
getchar;
until nextc = ^M;
getchar;
writeln(ofd[level]);
toktype := unknown;
end;
(********************************************************************)
procedure scan_number;
{scan a number; this also processes #nnn character literals, which are
converted into octal character literals. imbedded periods are processed,
and a special condition is noted for trailing periods. this is needed
for scanning the ".." keyword when used after numbers. an ungetchar
facility would be more general, but isn't needed anywhere else.
in pascal/mt+, #nnn is translated into nnnL }
var
hasdot: boolean;
octal: boolean;
islong: boolean;
begin
hasdot := false;
islong := false;
octal := false;
toktype := number;
(* check for preprocessor directives, character literals or long literals *)
if nextc = '#' then
begin
ltok := '';
if mt_plus then
islong := true
else
octal := true;
end;
getchar;
(* check for preprocessor directives *)
if octal and (nextc in ['a'..'z']) then
scan_preproc
else
repeat
case nextc of
'0'..'9':
ltok := ltok + usec;
'.':
if hasdot then
begin
if ltok[length(ltok)] = '.' then
begin
ltok[0] := pred(ltok[0]); {remove trailing ., part of ..}
if octal then
ltok := coctal(atoi(ltok));
extradot := true;
end;
exit;
end
else
begin
hasdot := true;
ltok := ltok + usec;
end;
else
begin
if octal then
ltok := coctal(atoi(ltok))
else
if islong then
ltok := ltok + 'L';
exit;
end;
end;
until true=false;
end;
(********************************************************************)
procedure scan_hat;
{scan tokens starting with ^ - returns ^X as a character literal
corresponding to the specified control character. returns ^ident as
an identifier with the leading ^ intact. also scans ^. and ^[.}
var
c: char;
begin
getchar;
if (nextc = '.') or (nextc = '[') then
ltok := '^' + usec {^. or ^[}
else
if nextc in ['A'..'Z','a'..'z','@'..'_'] then
begin
ltok := nextc;
scan_ident;
if length(ltok) = 1 then {^c = control char}
ltok := coctal( ord(upcase(ltok[1])) - ord('@') )
else
ltok := '^' + ltok; {^ident = pointer to ident}
end;
end;
(********************************************************************)
procedure scan_dot;
{scans tokens starting with "."; knows about the 'extra dot' condition
that comes up in number scanning. returns a token of either '.' or '..'}
begin
getchar;
if (nextc = '.') or extradot then
begin
ltok := '..';
extradot := false;
end;
if nextc = '.' then
getchar;
end;
(********************************************************************)
procedure scan_string;
{scans a literal string. processes imbedded quotes ala pascal. translates
the string into a C string with the proper escapes on imbedded quotes.
converts single character strings into character constants. these are
sometimes converted back to strings when the parser needs to}
begin
toktype := unknown;
ltok := '"';
getchar; {consume the open quote}
repeat
if nextc in [^J,^M] then
begin
syntax('Closing quote expected (scan_string)');
exit;
end;
if nextc = '''' then
begin
getchar; {consume the quote}
if nextc = '''' then
ltok := ltok + usec
{double quotes are coded as a single quote}
else
begin {end of string}
ltok := ltok + '"';
toktype := strng;
end;
end
else
if nextc = '"' then
ltok := ltok + newc('\"')
else
if nextc = '\' then
ltok := ltok + newc('\\')
else
ltok := ltok + usec;
until toktype = strng;
if length(ltok) = 3 then
begin
ltok[1] := '''';
ltok[3] := '''';
end;
if ltok = '"\""' then
ltok := '''"'''
else
if (ltok = '"''"') or (ltok = '''''''') then
ltok := '''\''''';
end;
(********************************************************************)
procedure scan_hex;
{scans a hex constant and returns it as a C style 0xHHHH literal}
begin
getchar; {consume the '$'}
ltok := '0x';
while nextc in ['0'..'9', 'A'..'F', 'a'..'f'] do
ltok := ltok + usec;
toktype := number;
end;
(********************************************************************)
procedure scan_pragma;
{scans a turbo pascal compiler option and translates it into a general
"pragma" ','nd. include directive is translated into the #include
','nd. returns with the first non-blank after the pragma}
var
code: char;
prag: anystring;
arg: anystring;
begin
repeat
if nextc = ',' then
newline;
getchar; {consume the $ or ,}
code := upcase(usec);
arg := usec;
if arg = '+' then
arg := 'ON'
else
if arg = '-' then
arg := 'OFF'
else
begin {decode numeric or string pragma params}
if arg = ' ' then
arg := '';
while not (nextc in [' ','*','}',',']) do
ltok := ltok + usec;
arg := arg;
end;
case code of
'I': if (arg = 'ON') or (arg = 'OFF') then
prag := '/* I(' + arg + ')' + ' */'
else
begin
prag := '#include "' + arg + '"' + ^M^J;
{$I-} assign(inclfd, arg);
reset(inclfd); {$I+}
if ioresult = 0 then
begin
read_include := true;
incl_name := arg;
if not includeinclude then
begin
write(ofd[level],prag,' ');
enter_nested;
srcfiles[level] := incl_name;
srclines[level] := 0;
write(con,^M^J,'':level*2+15,
srcfiles[level],^M);
end;
end;
end;
else prag := '/* ' + code + '(' + arg + ')' + ' */';
end;
write(ofd[level],prag,' ');
while nextc = ' ' do
getchar;
until nextc <> ',';
end;
(********************************************************************)
procedure scan_curlycomment;
{processes a curly-brace enclosed comment}
begin
getchar; {consume the open comment}
if nextc = '$' then
begin
scan_pragma;
if nextc = '}' then
begin
getchar;
exit;
end;
end;
write(ofd[level],' /* ');
while nextc <> '}' do
begin
write(ofd[level],nextc);
getchar;
end;
writeln(ofd[level],' */ ');
getchar; {consume the close comment}
end;
(********************************************************************)
procedure scan_parencomment;
{process a (* enclosed comment}
begin
getchar; {consume the *}
if nextc = '$' then
scan_pragma;
write(ofd[level],'/*');
repeat
write(ofd[level],nextc);
if nextc = '*' then
begin
getchar;
if nextc = ')' then
begin
writeln(ofd[level],'/ ');
getchar;
exit;
end;
end
else
getchar;
until true=false;
end;
(********************************************************************)
procedure scan_blanks;
{scan white space. this procedure sometimes passes whitespace to the
output. it keeps track of the indentation of the current line so it
can be used by newline}
var
linestart: boolean;
indent: anystring;
valid: boolean;
begin
linestart := false;
indent := '';
valid := false;
repeat
case nextc of
^J,^M: begin
if nospace=false then
write(ofd[level],nextc);
indent := '';
linestart := true;
getchar;
end;
' ',^I,^@,^L:
indent := indent + usec;
'#': if linestart then
begin
write(ofd[level],indent); {pass preprocessor directives}
indent := ''; {without change (single-line only)}
repeat
write(ofd[level],nextc);
getchar;
until nextc = ^M;
getchar;
writeln(ofd[level]);
end
else
valid := true;
else
valid := true;
end;
until valid;
if linestart then
begin
spaces := indent;
if nospace=false then
write(ofd[level],spaces);
end;
end;
(********************************************************************)
procedure scan_tok;
{scans the next lexical token; returns the token in ltok and toktype}
begin
scan_blanks;
toktype := unknown;
ltok := nextc;
case nextc of
'a'..'z',
'_', 'A'..'Z': scan_ident;
'''': scan_string;
'0'..'9': scan_number;
'#': begin
scan_number;
if toktype = unknown then
scan_tok; {in case of #directive}
end;
'$': scan_hex;
'<': begin
getchar;
if (nextc = '>') or (nextc = '=') then
ltok := '<' + usec;
end;
'>': begin
getchar;
if nextc = '=' then
ltok := '>' + usec;
end;
':': begin
getchar;
if nextc = '=' then
ltok := ':' + usec;
end;
'^': scan_hat;
'.': scan_dot;
'{': begin
scan_curlycomment;
scan_tok;
end;
'(': begin
getchar;
if nextc = '*' then
begin
scan_parencomment;
scan_tok;
end;
end;
else getchar; {consume the unknown char}
end;
end;
(********************************************************************)
procedure gettok;
{get the next input token; this is the top level of the lexical analyzer.
it returns ltok, tok(ltok in upper case), toktype. it translates BEGIN
and END into braces; it checks for statement and section keywords}
var
i: integer;
begin
scan_tok;
tok := ltok;
if toktype = identifier then
begin
stoupper(tok);
if tok = 'BEGIN' then
begin
tok := '{';
ltok := tok;
toktype := keyword;
exit;
end;
if tok = 'END' then
begin
tok := '}';
ltok := tok;
toktype := keyword;
exit;
end;
(* check for statement keywords *)
for i := 1 to nkeywords do
if tok = keywords[i] then
begin
toktype := keyword;
exit;
end;
end;
end;
(********************************************************************)
function usetok: string80;
{return (use) and consume current token}
var
tv: string80;
begin
tv := ltok;
gettok;
usetok := tv;
end;